#!/usr/bin/env python
# -*- coding:utf-8 -*-
import os
import sys

sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

import numpy as np
from scipy import signal


# dimension是输入向量的维度，length是序列长度，
# number_of_examples是样本数量，train_set_ratio是训练集
# 占总样本数的比例，seed是随机种子
# 构造正弦波和方波两类样本的函数
def get_sequence_data(dimension=10, length=10,
                      number_of_examples=1000, train_set_ratio=0.7, seed=42):
    """
    生成两类序列数据。
    """
    xx = []

    # 正弦波
    xx.append(np.sin(np.arange(0, 10, 10 / length)).reshape(-1, 1))

    # 方波
    xx.append(np.array(signal.square(np.arange(0, 10, 10 / length))).reshape(-1, 1))

    data = []
    for i in range(2):
        x = xx[i]
        for j in range(number_of_examples // 2):
            noise = np.random.normal(0, 0.6, (len(x), dimension))
            sequence = x + noise  # 加入噪声
            label = np.array([int(i == k) for k in range(2)])  # 标签是二分类的One-Hot编码
            data.append(np.c_[sequence.reshape(1, -1), label.reshape(1, -1)])

    # 把各个类别的样本合在一起
    data = np.concatenate(data, axis=0)

    # 随机打乱样本顺序
    np.random.shuffle(data)

    # 计算训练样本数量
    train_set_size = int(number_of_examples * train_set_ratio)  # 训练集样本数量

    # 将训练集和测试集、特征和标签分开
    return (data[:train_set_size, :-2].reshape(-1, length, dimension),
            data[:train_set_size, -2:],
            data[train_set_size:, :-2].reshape(-1, length, dimension),
            data[train_set_size:, -2:])
